const puppeteer = require('puppeteer-core');
const util = require('util');
const fs = require('fs').promises;
const path = require('path');
const config = require('../../config/index')

var arguments = process.argv.splice(2);
var TARGET_URL = arguments[0]

var MY_PATH = config.cookieFilePath
var COOKIE_DIR = path.resolve(MY_PATH, 'zhipin/list.json')
var DEFULTE_COOKIE = path.resolve(MY_PATH, 'zhipin/defult.json')

const DELAY = 5000

// 下一页按钮的选择器
var selector = "#main > div > div.job-list > div.page > a.next"

function message(code, message, ...args){
    var result = {
        "code": code,
        "message": message
    }

    if (args){
        result.data = {
            "htmlString": args[0],
            "nextUrl": args[1]
        }
    }
    return result;
}

function startCrawl(targetUrl) {
    (async () => {
        const browser = await puppeteer.launch({
            executablePath: config.chromePath,
            args: [
                '--disable-dev-shm-usage',
                '--no-sandbox',
                '--disable-setuid-sandbox',
                '--disable-gpu'
        ],
        ignoreDefaultArgs: ["--enable-automation"]
        });
        const page = await browser.newPage();

        // 删除chromium的webdriver
        await page.evaluateOnNewDocument(() => {
            delete navigator.__proto__.webdriver;
        });

        // 设置HTTP头，对整个请求过程的所有子页面有效
        await page.setExtraHTTPHeaders({
            'Pragma': 'no-cache',
            'Cache-Control': 'no-cache',
            'DNT': '1',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        });

        // 修改UA，这种方式设置，在HTTP头中会是大写的：'User-Agent'
        await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36');

        try{
            const cookies_json = await fs.readFile(DEFULTE_COOKIE);
            var cookies = JSON.parse(cookies_json);
            await page.setCookie(...cookies);
        }catch{}

        try{
            const cookies_json = await fs.readFile(COOKIE_DIR);
            var cookies = JSON.parse(cookies_json);
            await page.setCookie(...cookies);
        }catch{}

        var sendSingle = 1;
        try {
            await page.goto(targetUrl);
        } catch (error) {
            var msg = message("error", util.format("浏览器访问错误\n错误url: %s",targetUrl), "" ,"");
            console.log(JSON.stringify(msg));
            // console.log("\n", error);
            sendSingle = 0;
        }

        try{
            await page.waitFor(DELAY);
            cookies = await page.cookies();
            await fs.writeFile(COOKIE_DIR, JSON.stringify(cookies),{"flag": "w"});
        }catch{}
        
        if (1 == sendSingle) {
            try{
                var htmlString = await page.content();
                var nextUrl = await page.$eval(selector, el => el.href);
                var msg = message("success", "爬取完成", htmlString, nextUrl)
                console.log(JSON.stringify(msg));
            }catch{
                var msg = message("success", "没有下一页", htmlString, "")
                console.log(JSON.stringify(msg));
            }
        }
        browser.close();
    })();
};

startCrawl(TARGET_URL);